Import libraries
from IPython.display import display, Markdown
import plotly.express as px
from sem_covid.services.data_registry import Dataset
from sem_covid.entrypoints.notebooks.EDA.eda_wrangling.categorical_analyze import fast_categorical_analyze
from sem_covid.entrypoints.notebooks.EDA.eda_wrangling.confidence_interval_analysis import (
confidence_interval_with_mean, z_score_for_series, confidence_interval_for_proportion)
from sem_covid.entrypoints.notebooks.EDA.eda_wrangling.collision_analysis import (class_collision_in_columns,
class_collision)
eurlex = Dataset.EU_CELLAR.fetch()
CATEGORICAL_COLUMNS = ['resource_type_labels', 'eurovoc_concept_labels', 'subject_matter_labels',
'directory_codes_labels', 'author_labels', 'internal_comments']
100% (9792 of 9792) |####################| Elapsed Time: 0:01:04 Time: 0:01:04
eda_result = fast_categorical_analyze(eurlex, CATEGORICAL_COLUMNS ,"Eurlex Dataset")
| index | Absolute freq | |
|---|---|---|
| 0 | title | 714 |
| 1 | cdm_type_labels | 9792 |
| 2 | subject_matters | 6488 |
| 3 | subject_matter_labels | 6488 |
| 4 | directory_codes | 6419 |
| 5 | directory_codes_labels | 6419 |
| 6 | celex_numbers | 5206 |
| 7 | legal_elis | 8476 |
| 8 | authors | 5050 |
| 9 | author_labels | 5050 |
| 10 | full_ojs | 6373 |
| 11 | oj_sectors | 5050 |
| 12 | internal_comments | 6722 |
| 13 | is_in_force | 8927 |
| 14 | dates_created | 9792 |
| 15 | legal_dates_entry_into_force | 8924 |
| 16 | legal_dates_signature | 9776 |
| 17 | manifs_pdf | 880 |
| 18 | manifs_html | 5576 |
| 19 | content_path | 856 |
| 20 | content | 986 |
| 21 | failure_reason | 9788 |
| 22 | language | 8006 |
| resource_type_labels | Relative freq | |
|---|---|---|
| 0 | General publications | 41.48 |
| 1 | Study | 11.45 |
| 2 | Judicial information | 5.81 |
| 3 | Notice | 4.75 |
| 4 | Corrigendum | 4.67 |
| 5 | Regulation | 2.77 |
| 6 | Staff working document | 2.29 |
| 7 | Executive summary of a study | 2.19 |
| 8 | Decision | 2.15 |
| 9 | Implementing regulation | 2.04 |
| eurovoc_concept_labels | Relative freq | |
|---|---|---|
| 0 | epidemic | 12.97 |
| 1 | coronavirus disease | 10.62 |
| 2 | innovation | 9.78 |
| 3 | infectious disease | 6.49 |
| 4 | public health | 4.36 |
| 5 | air transport | 4.15 |
| 6 | labour market | 3.98 |
| 7 | research and development | 3.90 |
| 8 | health risk | 3.48 |
| 9 | economic consequence | 3.06 |
| subject_matter_labels | Relative freq | |
|---|---|---|
| 0 | Transport | 9.46 |
| 1 | Public health | 7.95 |
| 2 | Economic policy | 4.55 |
| 3 | Internal market - Principles | 4.45 |
| 4 | External relations | 3.88 |
| 5 | Environment | 3.63 |
| 6 | State aids | 3.50 |
| 7 | Economic and Monetary Union | 3.47 |
| 8 | European Free Trade Association (EFTA) | 3.08 |
| 9 | Approximation of laws | 2.72 |
| directory_codes_labels | Relative freq | |
|---|---|---|
| 0 | Health protection | 5.09 |
| 1 | State aids and other subsidies | 4.46 |
| 2 | Economic and monetary union | 4.28 |
| 3 | Member countries of the European Free Trade As... | 3.96 |
| 4 | Conventions with non-member countries | 3.38 |
| 5 | Protection of health and safety | 3.23 |
| 6 | Motor vehicles | 2.94 |
| 7 | Technical and safety conditions | 2.84 |
| 8 | Employment and unemployment | 2.84 |
| 9 | Budget | 2.65 |
| author_labels | Relative freq | |
|---|---|---|
| 0 | European Commission | 34.42 |
| 1 | Council of the European Union | 9.77 |
| 2 | Court of Justice | 6.09 |
| 3 | European Parliament | 5.34 |
| 4 | Secretariat-General | 3.97 |
| 5 | Directorate-General for Health and Food Safety | 3.85 |
| 6 | Directorate-General for Mobility and Transport | 3.71 |
| 7 | General Court | 3.03 |
| 8 | Directorate-General for Translation | 2.52 |
| 9 | Directorate-General for Competition | 2.32 |
| internal_comments | Relative freq | |
|---|---|---|
| 0 | MAN2 | 58.84 |
| 1 | COVID19 | 39.27 |
| 2 | BREXIT | 1.54 |
| 3 | MAN2, COVID19 | 0.21 |
| 4 | 552 | 0.05 |
| 5 | COVID-19 | 0.05 |
| 6 | COOVID19 | 0.03 |
for key in eda_result.keys():
data = eda_result[key].copy()
column_name = data.columns[1]
z_score_column = data.columns[0]+'_z_score'
cumulative_freq = 'Cumulative freq'
diff_freq = 'Diff freq'
data[z_score_column] = round((data[column_name]-data[column_name].mean())/data[column_name].std(),2)
data[cumulative_freq] = data[column_name].cumsum()
data[diff_freq] = data[column_name].diff()
display(Markdown(f"Std deviation for [{key}] is [{round(data[column_name].std(),2)}]"))
display(data)
px.bar(data,x=data.columns[2],y=data.columns[0]).show()
px.bar(data,x=data.columns[0],y=data.columns[3]).show()
px.bar(data,x=data.columns[0],y=data.columns[4]).show()
Std deviation for [resource_type_labels] is [5.19]
| resource_type_labels | Relative freq | resource_type_labels_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | General publications | 41.48 | 7.71 | 41.48 | NaN |
| 1 | Study | 11.45 | 1.93 | 52.93 | -30.03 |
| 2 | Judicial information | 5.81 | 0.84 | 58.74 | -5.64 |
| 3 | Notice | 4.75 | 0.64 | 63.49 | -1.06 |
| 4 | Corrigendum | 4.67 | 0.62 | 68.16 | -0.08 |
| ... | ... | ... | ... | ... | ... |
| 64 | Annual report | 0.02 | -0.28 | 99.96 | 0.00 |
| 65 | Draft implementing decision | 0.02 | -0.28 | 99.98 | 0.00 |
| 66 | Proposal for an implementing regulation | 0.02 | -0.28 | 100.00 | 0.00 |
| 67 | Executive summary of the fitness check | 0.02 | -0.28 | 100.02 | 0.00 |
| 68 | Green Paper | 0.02 | -0.28 | 100.04 | 0.00 |
69 rows × 5 columns
Std deviation for [eurovoc_concept_labels] is [1.49]
| eurovoc_concept_labels | Relative freq | eurovoc_concept_labels_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | epidemic | 12.97 | 8.42 | 12.97 | NaN |
| 1 | coronavirus disease | 10.62 | 6.85 | 23.59 | -2.35 |
| 2 | innovation | 9.78 | 6.28 | 33.37 | -0.84 |
| 3 | infectious disease | 6.49 | 4.07 | 39.86 | -3.29 |
| 4 | public health | 4.36 | 2.64 | 44.22 | -2.13 |
| ... | ... | ... | ... | ... | ... |
| 228 | European Social Fund | 0.01 | -0.28 | 99.58 | 0.00 |
| 229 | European security | 0.01 | -0.28 | 99.59 | 0.00 |
| 230 | distribution of EU funding | 0.01 | -0.28 | 99.60 | 0.00 |
| 231 | external border of the EU | 0.01 | -0.28 | 99.61 | 0.00 |
| 232 | green economy | 0.01 | -0.28 | 99.62 | 0.00 |
233 rows × 5 columns
Std deviation for [subject_matter_labels] is [1.38]
| subject_matter_labels | Relative freq | subject_matter_labels_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | Transport | 9.46 | 6.29 | 9.46 | NaN |
| 1 | Public health | 7.95 | 5.20 | 17.41 | -1.51 |
| 2 | Economic policy | 4.55 | 2.74 | 21.96 | -3.40 |
| 3 | Internal market - Principles | 4.45 | 2.67 | 26.41 | -0.10 |
| 4 | External relations | 3.88 | 2.26 | 30.29 | -0.57 |
| ... | ... | ... | ... | ... | ... |
| 128 | European Investment Bank (EIB) | 0.03 | -0.52 | 99.91 | 0.00 |
| 129 | Space | 0.03 | -0.52 | 99.94 | 0.00 |
| 130 | General provisions | 0.03 | -0.52 | 99.97 | 0.00 |
| 131 | Tourism | 0.03 | -0.52 | 100.00 | 0.00 |
| 132 | Administrative cooperation | 0.03 | -0.52 | 100.03 | 0.00 |
133 rows × 5 columns
Std deviation for [directory_codes_labels] is [0.86]
| directory_codes_labels | Relative freq | directory_codes_labels_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | Health protection | 5.09 | 5.33 | 5.09 | NaN |
| 1 | State aids and other subsidies | 4.46 | 4.60 | 9.55 | -0.63 |
| 2 | Economic and monetary union | 4.28 | 4.39 | 13.83 | -0.18 |
| 3 | Member countries of the European Free Trade As... | 3.96 | 4.02 | 17.79 | -0.32 |
| 4 | Conventions with non-member countries | 3.38 | 3.34 | 21.17 | -0.58 |
| ... | ... | ... | ... | ... | ... |
| 193 | General provisions | 0.04 | -0.54 | 99.84 | 0.00 |
| 194 | Aeronautical industry | 0.04 | -0.54 | 99.88 | 0.00 |
| 195 | Aid to developing countries | 0.04 | -0.54 | 99.92 | 0.00 |
| 196 | Nuclear research | 0.02 | -0.56 | 99.94 | -0.02 |
| 197 | European countries | 0.02 | -0.56 | 99.96 | 0.00 |
198 rows × 5 columns
Std deviation for [author_labels] is [4.08]
| author_labels | Relative freq | author_labels_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | European Commission | 34.42 | 8.14 | 34.42 | NaN |
| 1 | Council of the European Union | 9.77 | 2.09 | 44.19 | -24.65 |
| 2 | Court of Justice | 6.09 | 1.19 | 50.28 | -3.68 |
| 3 | European Parliament | 5.34 | 1.00 | 55.62 | -0.75 |
| 4 | Secretariat-General | 3.97 | 0.67 | 59.59 | -1.37 |
| ... | ... | ... | ... | ... | ... |
| 75 | European Union Intellectual Property Office | 0.03 | -0.30 | 99.99 | 0.00 |
| 76 | Single Resolution Board | 0.03 | -0.30 | 100.02 | 0.00 |
| 77 | Directorate-General for Structural Reform Support | 0.03 | -0.30 | 100.05 | 0.00 |
| 78 | Committee on the Internal Market and Consumer ... | 0.01 | -0.30 | 100.06 | -0.02 |
| 79 | Committee on Budgetary Control | 0.01 | -0.30 | 100.07 | 0.00 |
80 rows × 5 columns
Std deviation for [internal_comments] is [24.42]
| internal_comments | Relative freq | internal_comments_z_score | Cumulative freq | Diff freq | |
|---|---|---|---|---|---|
| 0 | MAN2 | 58.84 | 1.82 | 58.84 | NaN |
| 1 | COVID19 | 39.27 | 1.02 | 98.11 | -19.57 |
| 2 | BREXIT | 1.54 | -0.52 | 99.65 | -37.73 |
| 3 | MAN2, COVID19 | 0.21 | -0.58 | 99.86 | -1.33 |
| 4 | 552 | 0.05 | -0.58 | 99.91 | -0.16 |
| 5 | COVID-19 | 0.05 | -0.58 | 99.96 | 0.00 |
| 6 | COOVID19 | 0.03 | -0.58 | 99.99 | -0.02 |
for key in eda_result.keys():
data = eda_result[key].copy()
n = data.size
tmp_s = data[data.columns[1]].copy()
tmp_s /= 100
ci_mean = confidence_interval_with_mean(tmp_s)
display(Markdown(f"Confidence Interval for {key} is : [{ci_mean[0]}%, {ci_mean[1]}%]"))
data["Confidence Interval"] = confidence_interval_for_proportion(tmp_s)
data["z_score"] = z_score_for_series(tmp_s)
display(data)
display(Markdown(f"Overrepresented records from column: {key}"))
rel_f = 'Relative freq'
display(data.loc[data[rel_f] > ci_mean[1]])
display(Markdown(f"Normal represented records from column : {key}"))
display(data.loc[(data[rel_f] >= ci_mean[0]) & (data[rel_f] <= ci_mean[1])])
display(Markdown(f"Underrepresented records from column : {key}"))
display(data.loc[data[rel_f] < ci_mean[0]])
Confidence Interval for resource_type_labels is : [0.23%, 2.67%]
| resource_type_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | General publications | 41.48 | [29.85, 53.11] | 7.77 |
| 1 | Study | 11.45 | [3.94, 18.96] | 1.94 |
| 2 | Judicial information | 5.81 | [0.29, 11.33] | 0.85 |
| 3 | Notice | 4.75 | [0.0, 9.77] | 0.64 |
| 4 | Corrigendum | 4.67 | [0.0, 9.65] | 0.62 |
| ... | ... | ... | ... | ... |
| 64 | Annual report | 0.02 | [0.0, 0.35] | -0.28 |
| 65 | Draft implementing decision | 0.02 | [0.0, 0.35] | -0.28 |
| 66 | Proposal for an implementing regulation | 0.02 | [0.0, 0.35] | -0.28 |
| 67 | Executive summary of the fitness check | 0.02 | [0.0, 0.35] | -0.28 |
| 68 | Green Paper | 0.02 | [0.0, 0.35] | -0.28 |
69 rows × 4 columns
Overrepresented records from column: resource_type_labels
| resource_type_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | General publications | 41.48 | [29.85, 53.11] | 7.77 |
| 1 | Study | 11.45 | [3.94, 18.96] | 1.94 |
| 2 | Judicial information | 5.81 | [0.29, 11.33] | 0.85 |
| 3 | Notice | 4.75 | [0.0, 9.77] | 0.64 |
| 4 | Corrigendum | 4.67 | [0.0, 9.65] | 0.62 |
| 5 | Regulation | 2.77 | [0.0, 6.64] | 0.26 |
Normal represented records from column : resource_type_labels
| resource_type_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 6 | Staff working document | 2.29 | [0.0, 5.82] | 0.16 |
| 7 | Executive summary of a study | 2.19 | [0.0, 5.64] | 0.14 |
| 8 | Decision | 2.15 | [0.0, 5.57] | 0.14 |
| 9 | Implementing regulation | 2.04 | [0.0, 5.38] | 0.11 |
| 10 | Implementing decision | 1.83 | [0.0, 4.99] | 0.07 |
| 11 | Report | 1.77 | [0.0, 4.88] | 0.06 |
| 12 | Communication | 1.59 | [0.0, 4.54] | 0.03 |
| 13 | Announcements | 1.54 | [0.0, 4.45] | 0.02 |
| 14 | Opinion | 1.20 | [0.0, 3.77] | -0.05 |
| 15 | Annex to a study | 1.15 | [0.0, 3.67] | -0.06 |
| 16 | Recommendation | 1.14 | [0.0, 3.64] | -0.06 |
| 17 | Delegated regulation | 1.08 | [0.0, 3.52] | -0.07 |
| 18 | Proposal for a regulation | 0.93 | [0.0, 3.19] | -0.10 |
| 19 | Proposal for a decision | 0.76 | [0.0, 2.81] | -0.13 |
| 20 | Own-initiative opinion | 0.63 | [0.0, 2.5] | -0.16 |
| 21 | Texts adopted | 0.51 | [0.0, 2.19] | -0.18 |
| 22 | Directive | 0.50 | [0.0, 2.16] | -0.18 |
| 23 | Recommendation for a recommendation | 0.49 | [0.0, 2.14] | -0.19 |
| 24 | Summary | 0.49 | [0.0, 2.14] | -0.19 |
| 25 | Proposal for an implementing decision | 0.43 | [0.0, 1.97] | -0.20 |
| 26 | Council conclusions | 0.43 | [0.0, 1.97] | -0.20 |
| 27 | Evaluation | 0.24 | [0.0, 1.39] | -0.23 |
Underrepresented records from column : resource_type_labels
| resource_type_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 28 | Exploratory opinion | 0.21 | [0.0, 1.29] | -0.24 |
| 29 | Position | 0.21 | [0.0, 1.29] | -0.24 |
| 30 | Statement of reasons | 0.21 | [0.0, 1.29] | -0.24 |
| 31 | Resolution | 0.21 | [0.0, 1.29] | -0.24 |
| 32 | Addendum | 0.18 | [0.0, 1.18] | -0.25 |
| 33 | Notification | 0.18 | [0.0, 1.18] | -0.25 |
| 34 | Communication concerning the position of the C... | 0.17 | [0.0, 1.14] | -0.25 |
| 35 | Proposal for a recommendation | 0.17 | [0.0, 1.14] | -0.25 |
| 36 | Declaration | 0.16 | [0.0, 1.1] | -0.25 |
| 37 | Proposal for a directive | 0.15 | [0.0, 1.06] | -0.25 |
| 38 | Summary of impact assessment | 0.15 | [0.0, 1.06] | -0.25 |
| 39 | Executive summary of the evaluation | 0.14 | [0.0, 1.02] | -0.25 |
| 40 | Impact assessment | 0.13 | [0.0, 0.98] | -0.26 |
| 41 | Inception impact assessment | 0.13 | [0.0, 0.98] | -0.26 |
| 42 | Amended proposal for a regulation | 0.10 | [0.0, 0.85] | -0.26 |
| 43 | Recruitment | 0.09 | [0.0, 0.8] | -0.26 |
| 44 | Decision adopted by bodies created by internat... | 0.08 | [0.0, 0.75] | -0.27 |
| 45 | Evaluation study | 0.08 | [0.0, 0.75] | -0.27 |
| 46 | Guideline | 0.08 | [0.0, 0.75] | -0.27 |
| 47 | Evaluation roadmap | 0.08 | [0.0, 0.75] | -0.27 |
| 48 | Joint communication | 0.08 | [0.0, 0.75] | -0.27 |
| 49 | Joint report | 0.07 | [0.0, 0.69] | -0.27 |
| 50 | Amended proposal for a decision | 0.07 | [0.0, 0.69] | -0.27 |
| 51 | Draft supplementary and amending budget | 0.07 | [0.0, 0.69] | -0.27 |
| 52 | Supplementary and amending budget | 0.07 | [0.0, 0.69] | -0.27 |
| 53 | Roadmap | 0.07 | [0.0, 0.69] | -0.27 |
| 54 | Statement | 0.05 | [0.0, 0.58] | -0.27 |
| 55 | International agreement | 0.05 | [0.0, 0.58] | -0.27 |
| 56 | Additional opinion | 0.03 | [0.0, 0.44] | -0.28 |
| 57 | Executive summary of an evaluation study | 0.03 | [0.0, 0.44] | -0.28 |
| 58 | White Paper | 0.03 | [0.0, 0.44] | -0.28 |
| 59 | Recommendation for a decision | 0.02 | [0.0, 0.35] | -0.28 |
| 60 | Fitness check | 0.02 | [0.0, 0.35] | -0.28 |
| 61 | Delegated directive | 0.02 | [0.0, 0.35] | -0.28 |
| 62 | Draft implementing regulation | 0.02 | [0.0, 0.35] | -0.28 |
| 63 | Synopsis report of the public consultation | 0.02 | [0.0, 0.35] | -0.28 |
| 64 | Annual report | 0.02 | [0.0, 0.35] | -0.28 |
| 65 | Draft implementing decision | 0.02 | [0.0, 0.35] | -0.28 |
| 66 | Proposal for an implementing regulation | 0.02 | [0.0, 0.35] | -0.28 |
| 67 | Executive summary of the fitness check | 0.02 | [0.0, 0.35] | -0.28 |
| 68 | Green Paper | 0.02 | [0.0, 0.35] | -0.28 |
Confidence Interval for eurovoc_concept_labels is : [0.24%, 0.62%]
| eurovoc_concept_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | epidemic | 12.97 | [8.66, 17.28] | 8.44 |
| 1 | coronavirus disease | 10.62 | [6.66, 14.58] | 6.86 |
| 2 | innovation | 9.78 | [5.97, 13.59] | 6.29 |
| 3 | infectious disease | 6.49 | [3.33, 9.65] | 4.08 |
| 4 | public health | 4.36 | [1.74, 6.98] | 2.65 |
| ... | ... | ... | ... | ... |
| 228 | European Social Fund | 0.01 | [0.0, 0.14] | -0.28 |
| 229 | European security | 0.01 | [0.0, 0.14] | -0.28 |
| 230 | distribution of EU funding | 0.01 | [0.0, 0.14] | -0.28 |
| 231 | external border of the EU | 0.01 | [0.0, 0.14] | -0.28 |
| 232 | green economy | 0.01 | [0.0, 0.14] | -0.28 |
233 rows × 4 columns
Overrepresented records from column: eurovoc_concept_labels
| eurovoc_concept_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | epidemic | 12.97 | [8.66, 17.28] | 8.44 |
| 1 | coronavirus disease | 10.62 | [6.66, 14.58] | 6.86 |
| 2 | innovation | 9.78 | [5.97, 13.59] | 6.29 |
| 3 | infectious disease | 6.49 | [3.33, 9.65] | 4.08 |
| 4 | public health | 4.36 | [1.74, 6.98] | 2.65 |
| 5 | air transport | 4.15 | [1.59, 6.71] | 2.51 |
| 6 | labour market | 3.98 | [1.47, 6.49] | 2.39 |
| 7 | research and development | 3.90 | [1.41, 6.39] | 2.34 |
| 8 | health risk | 3.48 | [1.13, 5.83] | 2.05 |
| 9 | economic consequence | 3.06 | [0.85, 5.27] | 1.77 |
| 10 | aid to undertakings | 2.95 | [0.78, 5.12] | 1.70 |
| 11 | EU financing | 2.90 | [0.75, 5.05] | 1.66 |
| 12 | social impact | 2.22 | [0.33, 4.11] | 1.21 |
| 13 | disease prevention | 2.13 | [0.28, 3.98] | 1.15 |
| 14 | public awareness campaign | 1.84 | [0.11, 3.57] | 0.95 |
| 15 | working conditions | 1.78 | [0.08, 3.48] | 0.91 |
| 16 | health control | 1.73 | [0.06, 3.4] | 0.88 |
| 17 | health policy | 1.65 | [0.01, 3.29] | 0.82 |
| 18 | crisis management | 1.07 | [0.0, 2.39] | 0.43 |
| 19 | medical research | 1.06 | [0.0, 2.37] | 0.43 |
| 20 | applied research | 1.03 | [0.0, 2.33] | 0.41 |
| 21 | mass media | 1.01 | [0.0, 2.29] | 0.39 |
| 22 | free movement of persons | 0.84 | [0.0, 2.01] | 0.28 |
| 23 | occupational health | 0.83 | [0.0, 1.99] | 0.27 |
| 24 | free movement of workers | 0.80 | [0.0, 1.94] | 0.25 |
| 25 | social well-being | 0.68 | [0.0, 1.74] | 0.17 |
Normal represented records from column : eurovoc_concept_labels
| eurovoc_concept_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 26 | economic activity | 0.53 | [0.0, 1.46] | 0.07 |
| 27 | vaccination | 0.51 | [0.0, 1.42] | 0.06 |
| 28 | tourism | 0.49 | [0.0, 1.39] | 0.04 |
| 29 | social media | 0.49 | [0.0, 1.39] | 0.04 |
| 30 | quality of life | 0.49 | [0.0, 1.39] | 0.04 |
| 31 | socioeconomic conditions | 0.48 | [0.0, 1.37] | 0.04 |
| 32 | disinformation | 0.47 | [0.0, 1.35] | 0.03 |
| 33 | freedom of movement | 0.42 | [0.0, 1.25] | -0.01 |
| 34 | e-Health | 0.41 | [0.0, 1.23] | -0.01 |
| 35 | protective equipment | 0.41 | [0.0, 1.23] | -0.01 |
| 36 | organisation of health care | 0.40 | [0.0, 1.21] | -0.02 |
| 37 | social situation | 0.39 | [0.0, 1.19] | -0.03 |
| 38 | living conditions | 0.36 | [0.0, 1.13] | -0.05 |
| 39 | patient's rights | 0.35 | [0.0, 1.11] | -0.05 |
| 40 | aid to disadvantaged groups | 0.33 | [0.0, 1.07] | -0.07 |
| 41 | distance learning | 0.33 | [0.0, 1.07] | -0.07 |
| 42 | health service | 0.33 | [0.0, 1.07] | -0.07 |
| 43 | health legislation | 0.31 | [0.0, 1.02] | -0.08 |
| 44 | epidemiology | 0.26 | [0.0, 0.91] | -0.11 |
| 45 | economic support | 0.25 | [0.0, 0.89] | -0.12 |
| 46 | social participation | 0.25 | [0.0, 0.89] | -0.12 |
| 47 | disease surveillance | 0.24 | [0.0, 0.87] | -0.13 |
Underrepresented records from column : eurovoc_concept_labels
| eurovoc_concept_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 48 | illness | 0.21 | [0.0, 0.8] | -0.15 |
| 49 | working environment | 0.21 | [0.0, 0.8] | -0.15 |
| 50 | teleworking | 0.18 | [0.0, 0.72] | -0.17 |
| 51 | public hygiene | 0.15 | [0.0, 0.65] | -0.19 |
| 52 | social sciences | 0.13 | [0.0, 0.59] | -0.20 |
| ... | ... | ... | ... | ... |
| 228 | European Social Fund | 0.01 | [0.0, 0.14] | -0.28 |
| 229 | European security | 0.01 | [0.0, 0.14] | -0.28 |
| 230 | distribution of EU funding | 0.01 | [0.0, 0.14] | -0.28 |
| 231 | external border of the EU | 0.01 | [0.0, 0.14] | -0.28 |
| 232 | green economy | 0.01 | [0.0, 0.14] | -0.28 |
185 rows × 4 columns
Confidence Interval for subject_matter_labels is : [0.52%, 0.99%]
| subject_matter_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | Transport | 9.46 | [4.49, 14.43] | 6.32 |
| 1 | Public health | 7.95 | [3.35, 12.55] | 5.22 |
| 2 | Economic policy | 4.55 | [1.01, 8.09] | 2.76 |
| 3 | Internal market - Principles | 4.45 | [0.95, 7.95] | 2.68 |
| 4 | External relations | 3.88 | [0.6, 7.16] | 2.27 |
| ... | ... | ... | ... | ... |
| 128 | European Investment Bank (EIB) | 0.03 | [0.0, 0.32] | -0.52 |
| 129 | Space | 0.03 | [0.0, 0.32] | -0.52 |
| 130 | General provisions | 0.03 | [0.0, 0.32] | -0.52 |
| 131 | Tourism | 0.03 | [0.0, 0.32] | -0.52 |
| 132 | Administrative cooperation | 0.03 | [0.0, 0.32] | -0.52 |
133 rows × 4 columns
Overrepresented records from column: subject_matter_labels
| subject_matter_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | Transport | 9.46 | [4.49, 14.43] | 6.32 |
| 1 | Public health | 7.95 | [3.35, 12.55] | 5.22 |
| 2 | Economic policy | 4.55 | [1.01, 8.09] | 2.76 |
| 3 | Internal market - Principles | 4.45 | [0.95, 7.95] | 2.68 |
| 4 | External relations | 3.88 | [0.6, 7.16] | 2.27 |
| 5 | Environment | 3.63 | [0.45, 6.81] | 2.09 |
| 6 | State aids | 3.50 | [0.38, 6.62] | 1.99 |
| 7 | Economic and Monetary Union | 3.47 | [0.36, 6.58] | 1.97 |
| 8 | European Free Trade Association (EFTA) | 3.08 | [0.14, 6.02] | 1.69 |
| 9 | Approximation of laws | 2.72 | [0.0, 5.48] | 1.43 |
| 10 | Technical barriers | 2.62 | [0.0, 5.33] | 1.35 |
| 11 | Employment | 2.60 | [0.0, 5.3] | 1.34 |
| 12 | Consumer protection | 2.32 | [0.0, 4.88] | 1.14 |
| 13 | Budget | 2.27 | [0.0, 4.8] | 1.10 |
| 14 | Competition | 1.93 | [0.0, 4.27] | 0.85 |
| 15 | Concentrations between undertakings | 1.90 | [0.0, 4.22] | 0.83 |
| 16 | Financial provisions | 1.90 | [0.0, 4.22] | 0.83 |
| 17 | Provisions governing the Institutions | 1.72 | [0.0, 3.93] | 0.70 |
| 18 | Economic, social and territorial cohesion | 1.62 | [0.0, 3.77] | 0.63 |
| 19 | Economic and monetary policy | 1.57 | [0.0, 3.68] | 0.59 |
| 20 | Research and technological development | 1.50 | [0.0, 3.57] | 0.54 |
| 21 | Veterinary legislation | 1.42 | [0.0, 3.43] | 0.48 |
| 22 | Social provisions | 1.39 | [0.0, 3.38] | 0.46 |
| 23 | Principles, objectives and tasks of the Treaties | 1.15 | [0.0, 2.96] | 0.29 |
| 24 | Agricultural structures | 1.11 | [0.0, 2.89] | 0.26 |
| 25 | Area of freedom, security and justice | 1.01 | [0.0, 2.71] | 0.19 |
| 26 | Technology | 1.00 | [0.0, 2.69] | 0.18 |
Normal represented records from column : subject_matter_labels
| subject_matter_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 27 | Freedom of establishment | 0.98 | [0.0, 2.65] | 0.17 |
| 28 | Foodstuffs | 0.92 | [0.0, 2.54] | 0.12 |
| 29 | Taxation | 0.87 | [0.0, 2.45] | 0.09 |
| 30 | Information and verification | 0.82 | [0.0, 2.35] | 0.05 |
| 31 | Safety at work and elsewhere | 0.79 | [0.0, 2.29] | 0.03 |
| 32 | Energy | 0.79 | [0.0, 2.29] | 0.03 |
| 33 | Free movement of persons | 0.70 | [0.0, 2.12] | -0.04 |
| 34 | Value added tax | 0.65 | [0.0, 2.02] | -0.07 |
| 35 | Common organisation of agricultural markets | 0.65 | [0.0, 2.02] | -0.07 |
| 36 | Justice and home affairs | 0.62 | [0.0, 1.95] | -0.10 |
| 37 | Border checks | 0.61 | [0.0, 1.93] | -0.10 |
| 38 | Social Policy | 0.61 | [0.0, 1.93] | -0.10 |
| 39 | Common foreign and security policy | 0.61 | [0.0, 1.93] | -0.10 |
| 40 | Health and safety | 0.59 | [0.0, 1.89] | -0.12 |
| 41 | Education, vocational training and youth | 0.56 | [0.0, 1.83] | -0.14 |
| 42 | Regional policy | 0.56 | [0.0, 1.83] | -0.14 |
| 43 | Accession | 0.52 | [0.0, 1.74] | -0.17 |
Underrepresented records from column : subject_matter_labels
| subject_matter_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 44 | Protective measures | 0.47 | [0.0, 1.63] | -0.20 |
| 45 | Cooperation | 0.47 | [0.0, 1.63] | -0.20 |
| 46 | Investments | 0.46 | [0.0, 1.61] | -0.21 |
| 47 | Industrial policy | 0.41 | [0.0, 1.5] | -0.25 |
| 48 | Commercial policy | 0.36 | [0.0, 1.38] | -0.28 |
| ... | ... | ... | ... | ... |
| 128 | European Investment Bank (EIB) | 0.03 | [0.0, 0.32] | -0.52 |
| 129 | Space | 0.03 | [0.0, 0.32] | -0.52 |
| 130 | General provisions | 0.03 | [0.0, 0.32] | -0.52 |
| 131 | Tourism | 0.03 | [0.0, 0.32] | -0.52 |
| 132 | Administrative cooperation | 0.03 | [0.0, 0.32] | -0.52 |
89 rows × 4 columns
Confidence Interval for directory_codes_labels is : [0.39%, 0.62%]
| directory_codes_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | Health protection | 5.09 | [2.03, 8.15] | 5.34 |
| 1 | State aids and other subsidies | 4.46 | [1.58, 7.34] | 4.61 |
| 2 | Economic and monetary union | 4.28 | [1.46, 7.1] | 4.40 |
| 3 | Member countries of the European Free Trade As... | 3.96 | [1.24, 6.68] | 4.03 |
| 4 | Conventions with non-member countries | 3.38 | [0.86, 5.9] | 3.35 |
| ... | ... | ... | ... | ... |
| 193 | General provisions | 0.04 | [0.0, 0.32] | -0.54 |
| 194 | Aeronautical industry | 0.04 | [0.0, 0.32] | -0.54 |
| 195 | Aid to developing countries | 0.04 | [0.0, 0.32] | -0.54 |
| 196 | Nuclear research | 0.02 | [0.0, 0.22] | -0.57 |
| 197 | European countries | 0.02 | [0.0, 0.22] | -0.57 |
198 rows × 4 columns
Overrepresented records from column: directory_codes_labels
| directory_codes_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | Health protection | 5.09 | [2.03, 8.15] | 5.34 |
| 1 | State aids and other subsidies | 4.46 | [1.58, 7.34] | 4.61 |
| 2 | Economic and monetary union | 4.28 | [1.46, 7.1] | 4.40 |
| 3 | Member countries of the European Free Trade As... | 3.96 | [1.24, 6.68] | 4.03 |
| 4 | Conventions with non-member countries | 3.38 | [0.86, 5.9] | 3.35 |
| 5 | Protection of health and safety | 3.23 | [0.77, 5.69] | 3.18 |
| 6 | Motor vehicles | 2.94 | [0.59, 5.29] | 2.84 |
| 7 | Technical and safety conditions | 2.84 | [0.53, 5.15] | 2.72 |
| 8 | Employment and unemployment | 2.84 | [0.53, 5.15] | 2.72 |
| 9 | Budget | 2.65 | [0.41, 4.89] | 2.50 |
| 10 | Market operation | 2.50 | [0.33, 4.67] | 2.33 |
| 11 | Concentrations | 2.42 | [0.28, 4.56] | 2.23 |
| 12 | Animal health and zootechnics | 2.19 | [0.15, 4.23] | 1.96 |
| 13 | Financial and budgetary provisions | 2.13 | [0.12, 4.14] | 1.89 |
| 14 | Instruments of economic policy | 2.07 | [0.09, 4.05] | 1.82 |
| 15 | Economic policy | 1.86 | [0.0, 3.74] | 1.58 |
| 16 | General | 1.81 | [0.0, 3.67] | 1.52 |
| 17 | Information technology, telecommunications and... | 1.65 | [0.0, 3.42] | 1.33 |
| 18 | Principles, objectives and tasks of the Treaties | 1.59 | [0.0, 3.33] | 1.26 |
| 19 | Monitoring of atmospheric pollution | 1.23 | [0.0, 2.77] | 0.85 |
| 20 | Basic provisions | 1.17 | [0.0, 2.67] | 0.78 |
| 21 | Chemicals, industrial risk and biotechnology | 1.08 | [0.0, 2.52] | 0.67 |
| 22 | General provisions and programmes | 1.00 | [0.0, 2.39] | 0.58 |
| 23 | Research and technological development | 1.00 | [0.0, 2.39] | 0.58 |
| 24 | Crossing external borders | 0.98 | [0.0, 2.35] | 0.55 |
| 25 | Foodstuffs | 0.98 | [0.0, 2.35] | 0.55 |
| 26 | Dangerous substances | 0.90 | [0.0, 2.22] | 0.46 |
| 27 | Banks | 0.90 | [0.0, 2.22] | 0.46 |
| 28 | Turnover tax/VAT | 0.88 | [0.0, 2.18] | 0.44 |
| 29 | Other European countries | 0.88 | [0.0, 2.18] | 0.44 |
| 30 | General principles, programmes and statistics | 0.88 | [0.0, 2.18] | 0.44 |
| 31 | Common agricultural policy mechanisms | 0.86 | [0.0, 2.15] | 0.41 |
| 32 | Stock exchanges and other securities markets | 0.83 | [0.0, 2.09] | 0.38 |
| 33 | Coordination of structural instruments | 0.83 | [0.0, 2.09] | 0.38 |
| 34 | Asian countries | 0.79 | [0.0, 2.02] | 0.33 |
| 35 | General social provisions | 0.75 | [0.0, 1.95] | 0.29 |
| 36 | Provisions governing the institutions | 0.75 | [0.0, 1.95] | 0.29 |
| 37 | Mediterranean countries | 0.67 | [0.0, 1.81] | 0.19 |
| 38 | Other sectors for approximation of laws | 0.67 | [0.0, 1.81] | 0.19 |
| 39 | Free movement of persons | 0.63 | [0.0, 1.73] | 0.15 |
| 40 | Education and training | 0.63 | [0.0, 1.73] | 0.15 |
Normal represented records from column : directory_codes_labels
| directory_codes_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 41 | Proprietary medicinal products | 0.60 | [0.0, 1.68] | 0.11 |
| 42 | Working conditions | 0.58 | [0.0, 1.64] | 0.09 |
| 43 | Common Foreign and Security Policy | 0.58 | [0.0, 1.64] | 0.09 |
| 44 | Dissemination of information | 0.54 | [0.0, 1.56] | 0.04 |
| 45 | Cooperation with international and non-governm... | 0.52 | [0.0, 1.52] | 0.02 |
| 46 | Safety at work | 0.46 | [0.0, 1.4] | -0.05 |
| 47 | Social conditions | 0.46 | [0.0, 1.4] | -0.05 |
| 48 | Water protection and management | 0.46 | [0.0, 1.4] | -0.05 |
| 49 | Statistics | 0.46 | [0.0, 1.4] | -0.05 |
| 50 | The Near and Middle East | 0.42 | [0.0, 1.32] | -0.10 |
| 51 | Council | 0.42 | [0.0, 1.32] | -0.10 |
| 52 | Countries in transition | 0.42 | [0.0, 1.32] | -0.10 |
| 53 | Protection of economic interests | 0.40 | [0.0, 1.28] | -0.12 |
Underrepresented records from column : directory_codes_labels
| directory_codes_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 54 | Commission | 0.38 | [0.0, 1.24] | -0.15 |
| 55 | Social policy | 0.38 | [0.0, 1.24] | -0.15 |
| 56 | Police and judicial cooperation in criminal an... | 0.38 | [0.0, 1.24] | -0.15 |
| 57 | Programmes | 0.38 | [0.0, 1.24] | -0.15 |
| 58 | Plant health | 0.38 | [0.0, 1.24] | -0.15 |
| ... | ... | ... | ... | ... |
| 193 | General provisions | 0.04 | [0.0, 0.32] | -0.54 |
| 194 | Aeronautical industry | 0.04 | [0.0, 0.32] | -0.54 |
| 195 | Aid to developing countries | 0.04 | [0.0, 0.32] | -0.54 |
| 196 | Nuclear research | 0.02 | [0.0, 0.22] | -0.57 |
| 197 | European countries | 0.02 | [0.0, 0.22] | -0.57 |
144 rows × 4 columns
Confidence Interval for author_labels is : [0.36%, 2.14%]
| author_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | European Commission | 34.42 | [24.01, 44.83] | 8.19 |
| 1 | Council of the European Union | 9.77 | [3.26, 16.28] | 2.10 |
| 2 | Court of Justice | 6.09 | [0.85, 11.33] | 1.19 |
| 3 | European Parliament | 5.34 | [0.41, 10.27] | 1.01 |
| 4 | Secretariat-General | 3.97 | [0.0, 8.25] | 0.67 |
| ... | ... | ... | ... | ... |
| 75 | European Union Intellectual Property Office | 0.03 | [0.0, 0.41] | -0.30 |
| 76 | Single Resolution Board | 0.03 | [0.0, 0.41] | -0.30 |
| 77 | Directorate-General for Structural Reform Support | 0.03 | [0.0, 0.41] | -0.30 |
| 78 | Committee on the Internal Market and Consumer ... | 0.01 | [0.0, 0.23] | -0.31 |
| 79 | Committee on Budgetary Control | 0.01 | [0.0, 0.23] | -0.31 |
80 rows × 4 columns
Overrepresented records from column: author_labels
| author_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | European Commission | 34.42 | [24.01, 44.83] | 8.19 |
| 1 | Council of the European Union | 9.77 | [3.26, 16.28] | 2.10 |
| 2 | Court of Justice | 6.09 | [0.85, 11.33] | 1.19 |
| 3 | European Parliament | 5.34 | [0.41, 10.27] | 1.01 |
| 4 | Secretariat-General | 3.97 | [0.0, 8.25] | 0.67 |
| 5 | Directorate-General for Health and Food Safety | 3.85 | [0.0, 8.07] | 0.64 |
| 6 | Directorate-General for Mobility and Transport | 3.71 | [0.0, 7.85] | 0.61 |
| 7 | General Court | 3.03 | [0.0, 6.79] | 0.44 |
| 8 | Directorate-General for Translation | 2.52 | [0.0, 5.95] | 0.31 |
| 9 | Directorate-General for Competition | 2.32 | [0.0, 5.62] | 0.26 |
| 10 | Directorate-General for Economic and Financial... | 2.20 | [0.0, 5.41] | 0.23 |
| 11 | Directorate-General for Agriculture and Rural ... | 2.15 | [0.0, 5.33] | 0.22 |
Normal represented records from column : author_labels
| author_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 12 | EFTA Surveillance Authority | 2.04 | [0.0, 5.14] | 0.19 |
| 13 | European Economic and Social Committee | 1.88 | [0.0, 4.86] | 0.16 |
| 14 | Directorate-General for Internal Market, Indus... | 1.48 | [0.0, 4.13] | 0.06 |
| 15 | European Committee of the Regions | 1.09 | [0.0, 3.37] | -0.04 |
| 16 | Directorate-General for Employment, Social Aff... | 1.03 | [0.0, 3.24] | -0.05 |
| 17 | Directorate-General for Migration and Home Aff... | 1.02 | [0.0, 3.22] | -0.06 |
| 18 | Directorate-General for Environment | 0.94 | [0.0, 3.05] | -0.08 |
| 19 | Directorate-General for Taxation and Customs U... | 0.60 | [0.0, 2.29] | -0.16 |
| 20 | Directorate-General for Financial Stability, F... | 0.58 | [0.0, 2.24] | -0.17 |
| 21 | Directorate-General for Budget | 0.58 | [0.0, 2.24] | -0.17 |
| 22 | Directorate-General for Communications Network... | 0.57 | [0.0, 2.22] | -0.17 |
| 23 | European Central Bank | 0.57 | [0.0, 2.22] | -0.17 |
| 24 | Directorate-General for Justice and Consumers | 0.56 | [0.0, 2.2] | -0.17 |
| 25 | Directorate-General for Climate Action | 0.53 | [0.0, 2.12] | -0.18 |
| 26 | European Court of Auditors | 0.45 | [0.0, 1.92] | -0.20 |
| 27 | Directorate-General for Regional and Urban Policy | 0.45 | [0.0, 1.92] | -0.20 |
| 28 | Directorate-General for Research and Innovation | 0.44 | [0.0, 1.89] | -0.20 |
| 29 | Directorate-General for Energy | 0.40 | [0.0, 1.78] | -0.21 |
Underrepresented records from column : author_labels
| author_labels | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 30 | European Personnel Selection Office | 0.32 | [0.0, 1.56] | -0.23 |
| 31 | Eurostat | 0.26 | [0.0, 1.38] | -0.24 |
| 32 | Directorate-General for Neighbourhood and Enla... | 0.26 | [0.0, 1.38] | -0.24 |
| 33 | EFTA Court | 0.24 | [0.0, 1.31] | -0.25 |
| 34 | Commission for Social Policy, Education, Emplo... | 0.23 | [0.0, 1.28] | -0.25 |
| 35 | Representatives of the Governments of the Memb... | 0.21 | [0.0, 1.21] | -0.26 |
| 36 | Directorate-General for Trade | 0.20 | [0.0, 1.18] | -0.26 |
| 37 | European Systemic Risk Board | 0.19 | [0.0, 1.14] | -0.26 |
| 38 | Commission for Natural Resources | 0.19 | [0.0, 1.14] | -0.26 |
| 39 | Commission for Territorial Cohesion Policy and... | 0.19 | [0.0, 1.14] | -0.26 |
| 40 | Directorate-General for Education, Youth, Spor... | 0.16 | [0.0, 1.04] | -0.27 |
| 41 | Poland | 0.16 | [0.0, 1.04] | -0.27 |
| 42 | Committee on Transport and Tourism | 0.16 | [0.0, 1.04] | -0.27 |
| 43 | Directorate-General for International Cooperat... | 0.16 | [0.0, 1.04] | -0.27 |
| 44 | Commission for the Environment, Climate Change... | 0.15 | [0.0, 1.0] | -0.27 |
| 45 | Commission for Economic Policy | 0.13 | [0.0, 0.92] | -0.28 |
| 46 | Directorate-General for European Civil Protect... | 0.13 | [0.0, 0.92] | -0.28 |
| 47 | Standing Committee of the EFTA States | 0.13 | [0.0, 0.92] | -0.28 |
| 48 | Directorate-General for Maritime Affairs and F... | 0.13 | [0.0, 0.92] | -0.28 |
| 49 | Committee on the Environment, Public Health an... | 0.12 | [0.0, 0.88] | -0.28 |
| 50 | EU–Switzerland Joint Committee | 0.11 | [0.0, 0.84] | -0.28 |
| 51 | European External Action Service | 0.11 | [0.0, 0.84] | -0.28 |
| 52 | European Securities and Markets Authority | 0.11 | [0.0, 0.84] | -0.28 |
| 53 | European Data Protection Supervisor | 0.11 | [0.0, 0.84] | -0.28 |
| 54 | Committee on Economic and Monetary Affairs | 0.11 | [0.0, 0.84] | -0.28 |
| 55 | European Anti-Fraud Office | 0.11 | [0.0, 0.84] | -0.28 |
| 56 | Committee on Industry, Research and Energy | 0.11 | [0.0, 0.84] | -0.28 |
| 57 | European Union | 0.08 | [0.0, 0.7] | -0.29 |
| 58 | Committee on Culture and Education | 0.08 | [0.0, 0.7] | -0.29 |
| 59 | European Investment Bank | 0.08 | [0.0, 0.7] | -0.29 |
| 60 | Directorate-General for Human Resources and Se... | 0.08 | [0.0, 0.7] | -0.29 |
| 61 | Committee on Employment and Social Affairs | 0.07 | [0.0, 0.65] | -0.29 |
| 62 | UK Task Force | 0.05 | [0.0, 0.54] | -0.30 |
| 63 | Indonesia | 0.05 | [0.0, 0.54] | -0.30 |
| 64 | Commission for Citizenship, Governance, Instit... | 0.05 | [0.0, 0.54] | -0.30 |
| 65 | Committee on Agriculture and Rural Development | 0.05 | [0.0, 0.54] | -0.30 |
| 66 | Directorate-General for Justice | 0.05 | [0.0, 0.54] | -0.30 |
| 67 | Administrative Commission for the Coordination... | 0.04 | [0.0, 0.48] | -0.30 |
| 68 | Committee on Constitutional Affairs | 0.03 | [0.0, 0.41] | -0.30 |
| 69 | Joint Committee | 0.03 | [0.0, 0.41] | -0.30 |
| 70 | Directorate-General for Defence Industry and S... | 0.03 | [0.0, 0.41] | -0.30 |
| 71 | Shift2Rail Joint Undertaking | 0.03 | [0.0, 0.41] | -0.30 |
| 72 | European Atomic Energy Community | 0.03 | [0.0, 0.41] | -0.30 |
| 73 | India | 0.03 | [0.0, 0.41] | -0.30 |
| 74 | Committee on Regional Development | 0.03 | [0.0, 0.41] | -0.30 |
| 75 | European Union Intellectual Property Office | 0.03 | [0.0, 0.41] | -0.30 |
| 76 | Single Resolution Board | 0.03 | [0.0, 0.41] | -0.30 |
| 77 | Directorate-General for Structural Reform Support | 0.03 | [0.0, 0.41] | -0.30 |
| 78 | Committee on the Internal Market and Consumer ... | 0.01 | [0.0, 0.23] | -0.31 |
| 79 | Committee on Budgetary Control | 0.01 | [0.0, 0.23] | -0.31 |
Confidence Interval for internal_comments is : [-3.81%, 32.38%]
| internal_comments | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | MAN2 | 58.84 | [22.38, 95.3] | 1.97 |
| 1 | COVID19 | 39.27 | [3.09, 75.45] | 1.11 |
| 2 | BREXIT | 1.54 | [0.0, 10.66] | -0.56 |
| 3 | MAN2, COVID19 | 0.21 | [0.0, 3.6] | -0.62 |
| 4 | 552 | 0.05 | [0.0, 1.71] | -0.63 |
| 5 | COVID-19 | 0.05 | [0.0, 1.71] | -0.63 |
| 6 | COOVID19 | 0.03 | [0.0, 1.31] | -0.63 |
Overrepresented records from column: internal_comments
| internal_comments | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 0 | MAN2 | 58.84 | [22.38, 95.3] | 1.97 |
| 1 | COVID19 | 39.27 | [3.09, 75.45] | 1.11 |
Normal represented records from column : internal_comments
| internal_comments | Relative freq | Confidence Interval | z_score | |
|---|---|---|---|---|
| 2 | BREXIT | 1.54 | [0.0, 10.66] | -0.56 |
| 3 | MAN2, COVID19 | 0.21 | [0.0, 3.6] | -0.62 |
| 4 | 552 | 0.05 | [0.0, 1.71] | -0.63 |
| 5 | COVID-19 | 0.05 | [0.0, 1.71] | -0.63 |
| 6 | COOVID19 | 0.03 | [0.0, 1.31] | -0.63 |
Underrepresented records from column : internal_comments
| internal_comments | Relative freq | Confidence Interval | z_score |
|---|
class_collision_in_columns(eurlex[CATEGORICAL_COLUMNS])
Collision in column : resource_type_labels
Collision in column : eurovoc_concept_labels
Collision in column : subject_matter_labels
Collision in column : directory_codes_labels
Collision in column : author_labels
Collision in column : internal_comments
class_collision(eurlex[CATEGORICAL_COLUMNS])
Collision in dataframe